with mapview

Author

Tony Duan

This document demonstrates how to create an interactive map of accidents in China using the mapview package in R. The data is sourced from the chinese_incident_tracker GitHub repository, which contains a collection of incident reports.

1 Data Acquisition and Preparation

First, we load the necessary R libraries for data manipulation, web scraping, and visualization.

Code
# Load the tidyverse package for data manipulation and visualization.
library(tidyverse)
# Load the rvest package for web scraping.
library(rvest)
# Load the jsonlite package for working with JSON data.
library(jsonlite)
# Load the curl package for making HTTP requests.
library(curl)
# Load the lubridate package for working with dates and times.
library(lubridate)
# Load the plotly package for creating interactive plots.
library(plotly)
# Load the ggplot2 package for creating static plots.
library(ggplot2)
# Load the openxlsx package for reading and writing Excel files.
library(openxlsx)
# Load the readxl package for reading Excel files.
library(readxl)

1.1 Loading a Single File

We start by loading a single JSON file to understand its structure.

Code
# You can uncomment these lines to download the file from the URL.
# url <- "https://raw.githubusercontent.com/percent4/chinese_incident_tracker/main/elk/data/00105670-df53-4be8-9039-8a07fe2d2b4d.json"
# download.file(url,"./data/SAFI.json", mode = "wb")
Code
# Read the JSON file into a list.
data001 <- read_json("./data/SAFI.json")
Code
# Convert the list to a data frame and rename the coordinate columns.
data002=data001 %>% as.data.frame()
colnames(data002)[14] = "latitude"
colnames(data002)[15] = "longitude"
# Get a glimpse of the data frame.
glimpse(data002)
Rows: 1
Columns: 22
$ item_id            <chr> "00105670-df53-4be8-9039-8a07fe2d2b4d"
$ news_channel       <chr> "新闻坊"
$ title              <chr> "一养老院突发火灾,已致3死10伤!"
$ report             <chr> "4月4日\n\n“东莞应急管理”\n\n发布情况通报\n\n↓↓↓\n\n4月4日4时21分,东莞万…
$ original_websites  <chr> "https://mp.weixin.qq.com/s/xUZR8gZ_N0UqGj0GvH6L8A"
$ start_date         <chr> "2024-04-04"
$ start_time         <chr> "2024-04-04 04:21:00"
$ update_time        <chr> "2024-05-03 10:50:01"
$ incident_type      <chr> "火灾"
$ incident_reason    <chr> "该建筑为一栋11层楼房,起火部位为第三层303房,过火面积约20平方米。"…
$ person_death_num   <int> 3
$ person_injury_num  <int> 10
$ person_missing_num <int> 0
$ latitude           <dbl> 113.6952
$ longitude          <dbl> 23.07464
$ place              <chr> "东莞万江街道康怡护理院(公助民办养老院)"
$ province           <chr> "广东省"
$ city               <chr> "东莞市"
$ county             <chr> ""
$ town               <chr> "万江街道"
$ village            <chr> ""
$ is_final           <lgl> TRUE

1.2 Loading All Files

Now, we will load all the JSON files from the repository.

Code
# You can uncomment these lines to download the entire repository as a zip file.
# url="https://github.com/percent4/chinese_incident_tracker/archive/refs/heads/main.zip"
# download.file(url,"./data/data.zip", mode = "wb")
Code
# You can uncomment this line to unzip the downloaded file.
# unzip("./data/data.zip",exdir="./data/out")
Code
# Copy the JSON files from the unzipped folder to a new folder.
from <- "./data/out/chinese_incident_tracker-main/elk/data"
to   <- "./data/json_folder"
file.copy(list.files(from, full.names = TRUE), 
          to, 
          recursive = TRUE)
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
Code
# You can uncomment this line to list the files in the new folder.
# list.files("./data/json_folder")
Code
# Create an empty tibble to store all the data.
all_data=tibble()

# Loop through each JSON file in the folder.
for (i in list.files("./data/json_folder")) {
  # Read the JSON file.
  data001=read_json(paste0("./data/json_folder/",i))
  # Convert the list to a data frame.
  data002=data001 %>% as.data.frame()
  # Rename the coordinate columns.
  colnames(data002)[14] = "Longitude"
  colnames(data002)[15] = "Latitude"
  # Bind the data to the main tibble.
  all_data=rbind(all_data,data002)
}
Code
# Create a text column for the map labels and a month_year column for charting.
all_data=all_data %>% mutate(text=paste0(incident_type," ","死亡人数:",person_death_num," 受伤人数:",person_injury_num)
                             ,month_year= format_ISO8601(ymd(start_date), precision = "ym")
                             )
Code
# Get a glimpse of the final data frame.
glimpse(all_data)
Rows: 60
Columns: 24
$ item_id            <chr> "00105670-df53-4be8-9039-8a07fe2d2b4d", "0683ba1e-7…
$ news_channel       <chr> "新闻坊", "中国新闻网", "凤凰网", "搜狐网", "每日经济新闻", "中国新闻网", "杭…
$ title              <chr> "一养老院突发火灾,已致3死10伤!", "江西于都一矿企发生事故 致3死2伤", "#云南镇雄突发山…
$ report             <chr> "4月4日\n\n“东莞应急管理”\n\n发布情况通报\n\n↓↓↓\n\n4月4日4时21分,东莞万…
$ original_websites  <chr> "https://mp.weixin.qq.com/s/xUZR8gZ_N0UqGj0GvH6L8A"…
$ start_date         <chr> "2024-04-04", "2024-04-24", "2024-01-22", "2024-02-…
$ start_time         <chr> "2024-04-04 04:21:00", "2024-04-24 00:08:48", "2024…
$ update_time        <chr> "2024-05-03 10:50:01", "2024-05-03 00:08:48", "2024…
$ incident_type      <chr> "火灾", "溃水", "山体滑坡", "房屋坍塌", "燃气爆炸", "火灾", "摔伤", "摔伤…
$ incident_reason    <chr> "该建筑为一栋11层楼房,起火部位为第三层303房,过火面积约20平方米。", "企业清理排洪隧洞时溃…
$ person_death_num   <int> 3, 3, 44, 1, 7, 4, 1, 1, 7, 1, 18, 4, 5, 1, 15, 12,…
$ person_injury_num  <int> 10, 2, 2, 0, 27, 4, 0, 0, 0, 11, 1155, 0, 3, 2, 44,…
$ person_missing_num <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 6, 0, …
$ Longitude          <dbl> 113.69524, 115.32158, 104.95514, 113.28304, 116.808…
$ Latitude           <dbl> 23.07464, 25.72126, 27.49063, 32.22574, 39.95258, 4…
$ place              <chr> "东莞万江街道康怡护理院(公助民办养老院)", "于都县龙鑫矿业有限公司选矿厂", "云南省昭通市镇雄…
$ province           <chr> "广东省", "江西省", "云南省", "湖北省", "河北省", "内蒙古自治区", "新疆维吾尔…
$ city               <chr> "东莞市", "赣州市", "昭通市", "随州市", "廊坊市", "呼和浩特市", "伊犁哈萨克自…
$ county             <chr> "", "于都县", "镇雄县", "随县", "三河市", "新城区", "富蕴县", "富蕴县",…
$ town               <chr> "万江街道", "祁禄山镇", "塘房镇", "万和镇", "燕郊镇", "", "", "", "宁…
$ village            <chr> "", "金沙村", "", "", "小张各庄", "", "", "", "乔家沟", "", "…
$ is_final           <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRU…
$ text               <chr> "火灾 死亡人数:3 受伤人数:10", "溃水 死亡人数:3 受伤人数:2", "山体滑坡 死亡人数…
$ month_year         <chr> "2024-04", "2024-04", "2024-01", "2024-02", "2024-0…
Code
# You can uncomment this line to save the data to an Excel file.
# write.xlsx(all_data,'all_data.xlsx')

2 Charting the Data

Now, let’s create some charts to visualize the data.

2.1 Grouping by Month

We’ll group the data by month and summarize the number of deaths and injuries.

Code
# Group the data by month_year and summarize the number of deaths and injuries.
chartdata001=all_data %>% group_by(month_year)  %>%  summarise(person_death_num=sum(person_death_num)
                                                           ,person_injury_num=sum(person_injury_num)
                                                           )

2.2 Reshaping the Data

We’ll reshape the data from a wide format to a long format for easier plotting.

Code
# Reshape the data from wide to long format.
chartdata002=chartdata001 %>%select(month_year,person_death_num,person_injury_num) %>% 
  pivot_longer(!c(month_year), names_to = 'type', values_to = 'DATA')

2.3 Creating the Charts

We’ll create a bar chart of the number of deaths per month and a grouped bar chart of both deaths and injuries.

Code
# Create a bar chart of the number of deaths per month.
gg=ggplot(chartdata001, aes(x=month_year, y=person_death_num,label = person_death_num))+
  geom_bar(stat="identity",fill='red')+ geom_text(vjust = -1,
              position = position_dodge(width = 0.9))+ theme_bw()

# Convert the ggplot object to a plotly object for interactivity.
ggplotly(gg)
Code
# Create a grouped bar chart of the number of deaths and injuries per month.
gg=ggplot(chartdata002, aes(fill=type, y=DATA, x=month_year)) +
    geom_col(position = "dodge") +
    geom_text(aes(label = DATA), vjust = 1.5,
              position = position_dodge(width = 0.9))+scale_y_log10()+ theme_light()

# Convert the ggplot object to a plotly object.
pp=ggplotly(gg)

# Display the plotly object.
pp

3 Mapping the Data

Finally, we’ll create an interactive map of the accidents using the mapview package.

Code
# Truncate the report text to 100 characters.
all_data2 =all_data %>%  mutate(report=report %>% str_trunc(100))
Code
# You can uncomment this line to save the data to an Excel file.
# write.xlsx(all_data2,'all_data2.xlsx')
Code
# Load the mapview, sf, and stringr packages.
library(mapview)
library(sf)
library(stringr)

# Create an interactive map of the accidents.
mapview(all_data2, 
        map.types='OpenStreetMap', 
        label='text', 
        xcol = "Longitude", 
        ycol = "Latitude", 
        zcol='incident_type', 
        cex="person_death_num", 
        crs = 4269, 
        grid = FALSE)

4 Resources

Here are some useful resources for learning more about the tools used in this document.

https://github.com/percent4/chinese_incident_tracker

https://github.com/r-spatial/mapview

https://maps.clb.org.hk/

Back to top